Time series modeling#
Show code cell source
import copy
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import shap
import statsmodels.formula.api as smf
import lightgbm as lgb
import re
import os
import optuna
import pytorch_lightning as pl
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler, StaticCovariatesTransformer
from darts.models import TFTModel
from darts.metrics import mae, mape, rmse
from darts.explainability.tft_explainer import TFTExplainer
from darts.utils.likelihood_models import QuantileRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from darts.metrics import mape, mae
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
/home/kuura/anaconda3/envs/nuwats_env/lib/python3.9/site-packages/statsforecast/utils.py:231: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.
"ds": pd.date_range(start="1949-01-01", periods=len(AirPassengers), freq="M"),
The StatsForecast module could not be imported. To enable support for the StatsForecastAutoARIMA, StatsForecastAutoETS and Croston models, please consider installing it.
Show code cell source
RANDOM_STATE = 123
N_OPTUNA = 50
N_EPOCHS_FULL = 200
N_EPOCHS_OPT = 31 # set after running full epochs and looking at validation loss
Data to time series#
Net migration as train. Municipality statistictics as past covariates. Municipality name as static covariate.
Show code cell source
df_agg_w_stats = pd.read_csv("data/tft_input.csv", index_col=0)
df_agg_w_stats.head()
| Average age, both sexes_arr | Demographic dependency ratio_arr | Economic dependency ratio_arr | Land area, km²_arr | Population density_arr | Share of Finnish speakers, %_arr | Share of foreign citizens, %_arr | Share of persons aged under 15, %_arr | Share of persons belonging to other religious groups, %_arr | Share of persons belonging to the Evangelical Lutheran Church, %_arr | ... | Share of persons in outer urban area, %_arr | Share of persons in peri-urban area, %_arr | Share of persons in rural areas close to urban areas, %_arr | Share of persons in rural areas, %_arr | Share of persons in rural heartland areas, %_arr | Share of persons in sparsely populated rural areas, %_arr | Share of persons living in the area of birth, %_arr | year | net_migration | municipality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 38.6 | 52.2 | 130.7 | 2.466986 | 54.8 | 99.4 | 0.4 | 18.5 | 1.3 | 88.7 | ... | 0.0 | 2.2 | 45.4 | 96.3 | 1.5 | 0.0 | 48.3 | 1990 | 0.005608 | Akaa |
| 1 | 38.8 | 52.7 | 152.3 | 2.466986 | 54.9 | 99.4 | 0.5 | 18.5 | 1.7 | 88.5 | ... | 0.0 | 2.3 | 45.7 | 96.3 | 1.5 | 0.0 | 48.4 | 1991 | 0.002920 | Akaa |
| 2 | 38.8 | 53.4 | 171.6 | 2.466986 | 55.3 | 99.4 | 0.5 | 18.8 | 1.7 | 88.1 | ... | 0.0 | 2.4 | 45.3 | 96.2 | 1.4 | 0.0 | 48.8 | 1992 | 0.005123 | Akaa |
| 3 | 39.1 | 53.8 | 192.3 | 2.466853 | 55.1 | 99.3 | 0.6 | 18.6 | 1.8 | 87.5 | ... | 0.0 | 2.5 | 45.4 | 96.3 | 1.5 | 0.0 | 48.9 | 1993 | -0.003593 | Akaa |
| 4 | 39.3 | 55.2 | 183.2 | 2.466853 | 54.7 | 99.3 | 0.6 | 18.9 | 1.8 | 87.1 | ... | 0.0 | 2.6 | 45.0 | 96.3 | 1.5 | 0.0 | 48.9 | 1994 | -0.009602 | Akaa |
5 rows × 23 columns
Show code cell source
covariate_cols = [col for col in df_agg_w_stats.columns if col not in ["year", "net_migration", "municipality"]]
Show code cell source
# Convert target data to TimeSeries list
all_series = []
mun_names = []
for mun in df_agg_w_stats['municipality'].unique():
mun_names.append(mun)
# Target series
ts_target = TimeSeries.from_dataframe(
df_agg_w_stats[df_agg_w_stats['municipality'] == mun].set_index('year'),
value_cols=['net_migration'],
static_covariates=pd.DataFrame({'municipality': [mun]}) # Static
)
# Covariate series (shared across municipalities)
ts_covs = TimeSeries.from_dataframe(
df_agg_w_stats[df_agg_w_stats['municipality'] == mun].set_index('year'),
value_cols=covariate_cols # Covariate columns
)
all_series.append((ts_target, ts_covs))
Show code cell source
transformer = StaticCovariatesTransformer()
series_encoded = transformer.fit_transform([t[0] for t in all_series])
_a = []
for idx, (ts_target, ts_covs) in enumerate(all_series):
_a.append((series_encoded[idx], ts_covs))
all_series = _a
Show code cell source
def train_test_split(series_list, split_year=2015):
train, val = [], []
for target, covs in series_list:
# Split target
train_target = target.drop_after(split_year+1)
val_target = target.drop_before(split_year)
# Split covariates
train_covs = covs.drop_after(split_year+1)
val_covs = covs # Keep full length for future covariates
train.append((train_target, train_covs))
val.append((val_target, val_covs))
return train, val
train_data, val_data = train_test_split(all_series)
Show code cell source
from darts.dataprocessing.transformers import Scaler
from sklearn.preprocessing import StandardScaler
target_scaler = Scaler(scaler=StandardScaler()) # StandardScaler (mean=0, std=1)
train_scaled = target_scaler.fit_transform([t[0] for t in train_data])
val_scaled = target_scaler.transform([t[0] for t in val_data])
covariate_scaler = Scaler()
train_covs_scaled = covariate_scaler.fit_transform([t[1] for t in train_data])
val_covs_scaled = covariate_scaler.transform([t[1] for t in val_data])
Show code cell source
# plot some example
train_data[8][1].univariate_component(covariate_cols[1]).plot()
<Axes: xlabel='year'>
Show code cell source
# plot normalized example
train_covs_scaled[8].univariate_component(covariate_cols[1]).plot()
<Axes: xlabel='year'>
TFT hyperparameter optimization#
Show code cell source
def objective(trial):
# Hyperparameter space
params = {
'input_chunk_length': trial.suggest_int('input_chunk_length', 3, 5),
'output_chunk_length': trial.suggest_int('output_chunk_length', 1, 3),
'hidden_size': trial.suggest_categorical('hidden_size', [32, 64, 128]),
'lstm_layers': trial.suggest_int('lstm_layers', 1, 3),
'batch_size': trial.suggest_categorical('batch_size', [16, 32, 64]),
'num_attention_heads': trial.suggest_int('num_attention_heads', 3, 6),
'dropout': trial.suggest_float('dropout', 0.01, 0.3),
# 'full_attention': trial.suggest_categorical('full2attention', [True]),
'add_relative_index': True,
'use_static_covariates': True,
'optimizer_kwargs': {'lr': trial.suggest_float('lr', 1e-3, 1e-2, log=True)}
}
# Custom callbacks
early_stop = EarlyStopping(
monitor='val_loss',
patience=5,
mode='min'
)
callbacks = [early_stop]
# Model setup
model = TFTModel(
#pl_trainer_kwargs={'callbacks': callbacks},
pl_trainer_kwargs={
"callbacks": callbacks,
#accelerator": "gpu",
#"devices": 2, # Use 2 GPUs
},
random_state=RANDOM_STATE,
**params
)
# Training
model.fit(
train_scaled,
past_covariates=train_covs_scaled,
val_series=val_scaled,
val_past_covariates=val_covs_scaled,
epochs=100,
verbose=True
)
final_prediction = model.predict(
n=len(val_scaled[0]),
series=train_scaled,
past_covariates=val_covs_scaled
)
# mape per year avg
all_mape = [mape(act, pred) / len(final_prediction) for act, pred in zip(val_scaled, final_prediction)]
# mean for all
mean_mape = sum(all_mape) / len(all_mape)
return mean_mape # max(all_mape)#
# return max(all_mape)#
Show code cell source
study = optuna.create_study(
direction='minimize',
sampler=optuna.samplers.TPESampler(seed=42),
pruner=optuna.pruners.MedianPruner(
n_startup_trials=5,
n_warmup_steps=10
)
)
study.optimize(objective, n_trials=N_OPTUNA, timeout=3600)
[I 2025-05-07 10:42:57,821] A new study created in memory with name: no-name-037edc78-9dfe-49c4-9925-c51ddda65a4b
/home/kuura/anaconda3/envs/nuwats_env/lib/python3.9/site-packages/torch/random.py:187: UserWarning: CUDA reports that you have 2 available devices, and you have used fork_rng without explicitly specifying which devices are being used. For safety, we initialize *every* CUDA device by default, which can be quite slow if you have a lot of CUDAs. If you know that you are only making use of a few CUDA devices, set the environment variable CUDA_VISIBLE_DEVICES or the 'devices' keyword argument of fork_rng with the set of devices you are actually using. For example, if you are using CPU only, set device.upper()_VISIBLE_DEVICES= or devices=[]; if you are using device 0 only, set CUDA_VISIBLE_DEVICES=0 or devices=[0]. To initialize all devices and suppress this warning, set the 'devices' keyword argument to `range(torch.cuda.device_count())`.
warnings.warn(message)
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 8.4 K | train
11 | lstm_decoder | LSTM | 8.4 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.4 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
80.8 K Trainable params
0 Non-trainable params
80.8 K Total params
0.323 Total estimated model params size (MB)
489 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:43:12,856] Trial 0 finished with value: 3.2172628881259113 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 3, 'hidden_size': 32, 'lstm_layers': 1, 'batch_size': 32, 'num_attention_heads': 5, 'dropout': 0.01596950334578271, 'lr': 0.009330606024425666}. Best is trial 0 with value: 3.2172628881259113.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 3.1 K | train
4 | encoder_vsn | _VariableSelectionNetwork | 76.6 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 3.1 K | train
6 | static_context_grn | _GatedResidualNetwork | 66.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 66.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 66.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 66.3 K | train
10 | lstm_encoder | LSTM | 264 K | train
11 | lstm_decoder | LSTM | 264 K | train
12 | post_lstm_gan | _GateAddNorm | 33.3 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 82.7 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 43.3 K | train
15 | post_attn_gan | _GateAddNorm | 33.3 K | train
16 | feed_forward_block | _GatedResidualNetwork | 66.3 K | train
17 | pre_output_gan | _GateAddNorm | 33.3 K | train
18 | output_layer | Linear | 2.2 K | train
------------------------------------------------------------------------------------------------
1.2 M Trainable params
0 Non-trainable params
1.2 M Total params
4.683 Total estimated model params size (MB)
485 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:43:20,748] Trial 1 finished with value: 3.155070106328744 and parameters: {'input_chunk_length': 5, 'output_chunk_length': 1, 'hidden_size': 128, 'lstm_layers': 2, 'batch_size': 64, 'num_attention_heads': 3, 'dropout': 0.09472194807521325, 'lr': 0.0023246728489504354}. Best is trial 1 with value: 3.155070106328744.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 3.1 K | train
4 | encoder_vsn | _VariableSelectionNetwork | 76.6 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 3.1 K | train
6 | static_context_grn | _GatedResidualNetwork | 66.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 66.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 66.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 66.3 K | train
10 | lstm_encoder | LSTM | 132 K | train
11 | lstm_decoder | LSTM | 132 K | train
12 | post_lstm_gan | _GateAddNorm | 33.3 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 82.7 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 37.9 K | train
15 | post_attn_gan | _GateAddNorm | 33.3 K | train
16 | feed_forward_block | _GatedResidualNetwork | 66.3 K | train
17 | pre_output_gan | _GateAddNorm | 33.3 K | train
18 | output_layer | Linear | 2.2 K | train
------------------------------------------------------------------------------------------------
901 K Trainable params
0 Non-trainable params
901 K Total params
3.605 Total estimated model params size (MB)
491 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:43:49,631] Trial 2 finished with value: 3.1524690403560456 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 3, 'hidden_size': 128, 'lstm_layers': 1, 'batch_size': 16, 'num_attention_heads': 6, 'dropout': 0.2900332895916222, 'lr': 0.006432759992849893}. Best is trial 2 with value: 3.1524690403560456.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.4 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
97.7 K Trainable params
0 Non-trainable params
97.7 K Total params
0.391 Total estimated model params size (MB)
489 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:44:09,062] Trial 3 finished with value: 2.8244274197413697 and parameters: {'input_chunk_length': 3, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 32, 'num_attention_heads': 5, 'dropout': 0.10039621206592916, 'lr': 0.0033118298880723835}. Best is trial 3 with value: 2.8244274197413697.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 25.3 K | train
11 | lstm_decoder | LSTM | 25.3 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.6 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
114 K Trainable params
0 Non-trainable params
114 K Total params
0.459 Total estimated model params size (MB)
485 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:44:31,089] Trial 4 finished with value: 2.929273710923364 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 3, 'batch_size': 32, 'num_attention_heads': 3, 'dropout': 0.023115913784056037, 'lr': 0.002115097202168559}. Best is trial 3 with value: 2.8244274197413697.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.3 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
97.7 K Trainable params
0 Non-trainable params
97.7 K Total params
0.391 Total estimated model params size (MB)
491 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:44:50,064] Trial 5 finished with value: 2.7710426061900164 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 32, 'num_attention_heads': 6, 'dropout': 0.23395098309603066, 'lr': 0.0015802131864103887}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 3.1 K | train
4 | encoder_vsn | _VariableSelectionNetwork | 76.6 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 3.1 K | train
6 | static_context_grn | _GatedResidualNetwork | 66.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 66.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 66.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 66.3 K | train
10 | lstm_encoder | LSTM | 132 K | train
11 | lstm_decoder | LSTM | 132 K | train
12 | post_lstm_gan | _GateAddNorm | 33.3 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 82.7 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 38.7 K | train
15 | post_attn_gan | _GateAddNorm | 33.3 K | train
16 | feed_forward_block | _GatedResidualNetwork | 66.3 K | train
17 | pre_output_gan | _GateAddNorm | 33.3 K | train
18 | output_layer | Linear | 2.2 K | train
------------------------------------------------------------------------------------------------
901 K Trainable params
0 Non-trainable params
901 K Total params
3.608 Total estimated model params size (MB)
489 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:44:57,545] Trial 6 finished with value: 3.287841314670448 and parameters: {'input_chunk_length': 3, 'output_chunk_length': 3, 'hidden_size': 128, 'lstm_layers': 1, 'batch_size': 64, 'num_attention_heads': 5, 'dropout': 0.10596042720726825, 'lr': 0.0011575995526672777}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 3.1 K | train
4 | encoder_vsn | _VariableSelectionNetwork | 76.6 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 3.1 K | train
6 | static_context_grn | _GatedResidualNetwork | 66.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 66.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 66.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 66.3 K | train
10 | lstm_encoder | LSTM | 264 K | train
11 | lstm_decoder | LSTM | 264 K | train
12 | post_lstm_gan | _GateAddNorm | 33.3 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 82.7 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 38.7 K | train
15 | post_attn_gan | _GateAddNorm | 33.3 K | train
16 | feed_forward_block | _GatedResidualNetwork | 66.3 K | train
17 | pre_output_gan | _GateAddNorm | 33.3 K | train
18 | output_layer | Linear | 2.2 K | train
------------------------------------------------------------------------------------------------
1.2 M Trainable params
0 Non-trainable params
1.2 M Total params
4.664 Total estimated model params size (MB)
489 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:45:11,492] Trial 7 finished with value: 3.2851217762064557 and parameters: {'input_chunk_length': 3, 'output_chunk_length': 1, 'hidden_size': 128, 'lstm_layers': 2, 'batch_size': 64, 'num_attention_heads': 5, 'dropout': 0.23358048218682267, 'lr': 0.003117422003004632}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 1.6 K | train
4 | encoder_vsn | _VariableSelectionNetwork | 42.8 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 1.6 K | train
6 | static_context_grn | _GatedResidualNetwork | 16.8 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 16.8 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 16.8 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 16.8 K | train
10 | lstm_encoder | LSTM | 66.6 K | train
11 | lstm_decoder | LSTM | 66.6 K | train
12 | post_lstm_gan | _GateAddNorm | 8.4 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 20.9 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 10.9 K | train
15 | post_attn_gan | _GateAddNorm | 8.4 K | train
16 | feed_forward_block | _GatedResidualNetwork | 16.8 K | train
17 | pre_output_gan | _GateAddNorm | 8.4 K | train
18 | output_layer | Linear | 1.1 K | train
------------------------------------------------------------------------------------------------
321 K Trainable params
0 Non-trainable params
321 K Total params
1.285 Total estimated model params size (MB)
485 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:45:17,400] Trial 8 finished with value: 3.182807440936476 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 2, 'hidden_size': 64, 'lstm_layers': 2, 'batch_size': 64, 'num_attention_heads': 3, 'dropout': 0.1290110476803326, 'lr': 0.005695752881519846}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 3.1 K | train
4 | encoder_vsn | _VariableSelectionNetwork | 76.6 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 3.1 K | train
6 | static_context_grn | _GatedResidualNetwork | 66.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 66.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 66.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 66.3 K | train
10 | lstm_encoder | LSTM | 396 K | train
11 | lstm_decoder | LSTM | 396 K | train
12 | post_lstm_gan | _GateAddNorm | 33.3 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 82.7 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 43.3 K | train
15 | post_attn_gan | _GateAddNorm | 33.3 K | train
16 | feed_forward_block | _GatedResidualNetwork | 66.3 K | train
17 | pre_output_gan | _GateAddNorm | 33.3 K | train
18 | output_layer | Linear | 2.2 K | train
------------------------------------------------------------------------------------------------
1.4 M Trainable params
0 Non-trainable params
1.4 M Total params
5.740 Total estimated model params size (MB)
485 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:45:37,197] Trial 9 finished with value: 3.426486448581299 and parameters: {'input_chunk_length': 3, 'output_chunk_length': 1, 'hidden_size': 128, 'lstm_layers': 3, 'batch_size': 32, 'num_attention_heads': 3, 'dropout': 0.26884210956209353, 'lr': 0.003462120997293594}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 1.6 K | train
4 | encoder_vsn | _VariableSelectionNetwork | 42.8 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 1.6 K | train
6 | static_context_grn | _GatedResidualNetwork | 16.8 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 16.8 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 16.8 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 16.8 K | train
10 | lstm_encoder | LSTM | 99.8 K | train
11 | lstm_decoder | LSTM | 99.8 K | train
12 | post_lstm_gan | _GateAddNorm | 8.4 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 20.9 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 9.1 K | train
15 | post_attn_gan | _GateAddNorm | 8.4 K | train
16 | feed_forward_block | _GatedResidualNetwork | 16.8 K | train
17 | pre_output_gan | _GateAddNorm | 8.4 K | train
18 | output_layer | Linear | 1.1 K | train
------------------------------------------------------------------------------------------------
386 K Trainable params
0 Non-trainable params
386 K Total params
1.544 Total estimated model params size (MB)
491 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:46:01,193] Trial 10 finished with value: 3.1468402126438115 and parameters: {'input_chunk_length': 5, 'output_chunk_length': 2, 'hidden_size': 64, 'lstm_layers': 3, 'batch_size': 16, 'num_attention_heads': 6, 'dropout': 0.19976919160351836, 'lr': 0.0010301041800719}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.3 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
97.7 K Trainable params
0 Non-trainable params
97.7 K Total params
0.391 Total estimated model params size (MB)
491 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:46:12,862] Trial 11 finished with value: 3.565093398001894 and parameters: {'input_chunk_length': 3, 'output_chunk_length': 2, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 32, 'num_attention_heads': 6, 'dropout': 0.14974993560236557, 'lr': 0.0017328588897268632}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.6 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
98.0 K Trainable params
0 Non-trainable params
98.0 K Total params
0.392 Total estimated model params size (MB)
487 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:46:44,091] Trial 12 finished with value: 3.057614493309573 and parameters: {'input_chunk_length': 5, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 32, 'num_attention_heads': 4, 'dropout': 0.19069369229797334, 'lr': 0.003810033249853406}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.6 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
98.0 K Trainable params
0 Non-trainable params
98.0 K Total params
0.392 Total estimated model params size (MB)
487 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:47:12,931] Trial 13 finished with value: 3.1211860069441304 and parameters: {'input_chunk_length': 3, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 32, 'num_attention_heads': 4, 'dropout': 0.1912022643285305, 'lr': 0.0015330155375018516}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 8.4 K | train
11 | lstm_decoder | LSTM | 8.4 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.3 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
80.8 K Trainable params
0 Non-trainable params
80.8 K Total params
0.323 Total estimated model params size (MB)
491 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:47:25,673] Trial 14 finished with value: 3.4406769822206957 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 2, 'hidden_size': 32, 'lstm_layers': 1, 'batch_size': 32, 'num_attention_heads': 6, 'dropout': 0.06372532442138444, 'lr': 0.0026063423863597413}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 25.3 K | train
11 | lstm_decoder | LSTM | 25.3 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.4 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
114 K Trainable params
0 Non-trainable params
114 K Total params
0.458 Total estimated model params size (MB)
489 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:47:51,367] Trial 15 finished with value: 3.0449283289581732 and parameters: {'input_chunk_length': 3, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 3, 'batch_size': 32, 'num_attention_heads': 5, 'dropout': 0.23812208281329944, 'lr': 0.001473798250395157}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.6 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
98.0 K Trainable params
0 Non-trainable params
98.0 K Total params
0.392 Total estimated model params size (MB)
487 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:48:16,852] Trial 16 finished with value: 2.87467182922746 and parameters: {'input_chunk_length': 5, 'output_chunk_length': 2, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 16, 'num_attention_heads': 4, 'dropout': 0.052619799247829085, 'lr': 0.004657811282458697}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 1.6 K | train
4 | encoder_vsn | _VariableSelectionNetwork | 42.8 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 1.6 K | train
6 | static_context_grn | _GatedResidualNetwork | 16.8 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 16.8 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 16.8 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 16.8 K | train
10 | lstm_encoder | LSTM | 33.3 K | train
11 | lstm_decoder | LSTM | 33.3 K | train
12 | post_lstm_gan | _GateAddNorm | 8.4 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 20.9 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 9.1 K | train
15 | post_attn_gan | _GateAddNorm | 8.4 K | train
16 | feed_forward_block | _GatedResidualNetwork | 16.8 K | train
17 | pre_output_gan | _GateAddNorm | 8.4 K | train
18 | output_layer | Linear | 1.1 K | train
------------------------------------------------------------------------------------------------
252 K Trainable params
0 Non-trainable params
252 K Total params
1.012 Total estimated model params size (MB)
491 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:48:30,371] Trial 17 finished with value: 3.205775671917319 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 1, 'hidden_size': 64, 'lstm_layers': 1, 'batch_size': 32, 'num_attention_heads': 6, 'dropout': 0.16564303759663873, 'lr': 0.0019280314104731502}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.4 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
97.7 K Trainable params
0 Non-trainable params
97.7 K Total params
0.391 Total estimated model params size (MB)
489 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:48:46,353] Trial 18 finished with value: 3.421342416192015 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 2, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 32, 'num_attention_heads': 5, 'dropout': 0.24054772079203407, 'lr': 0.0028199283295248247}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 25.3 K | train
11 | lstm_decoder | LSTM | 25.3 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.3 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
114 K Trainable params
0 Non-trainable params
114 K Total params
0.458 Total estimated model params size (MB)
491 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:49:21,820] Trial 19 finished with value: 3.6126399645958367 and parameters: {'input_chunk_length': 3, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 3, 'batch_size': 16, 'num_attention_heads': 6, 'dropout': 0.09350320091294194, 'lr': 0.0012307619265611923}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 1.6 K | train
4 | encoder_vsn | _VariableSelectionNetwork | 42.8 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 1.6 K | train
6 | static_context_grn | _GatedResidualNetwork | 16.8 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 16.8 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 16.8 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 16.8 K | train
10 | lstm_encoder | LSTM | 66.6 K | train
11 | lstm_decoder | LSTM | 66.6 K | train
12 | post_lstm_gan | _GateAddNorm | 8.4 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 20.9 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 9.3 K | train
15 | post_attn_gan | _GateAddNorm | 8.4 K | train
16 | feed_forward_block | _GatedResidualNetwork | 16.8 K | train
17 | pre_output_gan | _GateAddNorm | 8.4 K | train
18 | output_layer | Linear | 1.1 K | train
------------------------------------------------------------------------------------------------
319 K Trainable params
0 Non-trainable params
319 K Total params
1.279 Total estimated model params size (MB)
489 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:49:51,510] Trial 20 finished with value: 3.453641231634728 and parameters: {'input_chunk_length': 5, 'output_chunk_length': 2, 'hidden_size': 64, 'lstm_layers': 2, 'batch_size': 32, 'num_attention_heads': 5, 'dropout': 0.13943338245051046, 'lr': 0.004291710243903848}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.6 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
98.0 K Trainable params
0 Non-trainable params
98.0 K Total params
0.392 Total estimated model params size (MB)
487 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:50:17,427] Trial 21 finished with value: 2.8787983367547105 and parameters: {'input_chunk_length': 5, 'output_chunk_length': 2, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 16, 'num_attention_heads': 4, 'dropout': 0.06321440768527387, 'lr': 0.004711267798227085}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.6 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
98.0 K Trainable params
0 Non-trainable params
98.0 K Total params
0.392 Total estimated model params size (MB)
487 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:50:51,098] Trial 22 finished with value: 3.7819583051745207 and parameters: {'input_chunk_length': 5, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 16, 'num_attention_heads': 4, 'dropout': 0.05284660252567328, 'lr': 0.006845212186677167}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.6 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
98.0 K Trainable params
0 Non-trainable params
98.0 K Total params
0.392 Total estimated model params size (MB)
487 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:51:14,877] Trial 23 finished with value: 2.964883529891839 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 2, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 16, 'num_attention_heads': 4, 'dropout': 0.04470317762017736, 'lr': 0.004724334445166646}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.6 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
98.0 K Trainable params
0 Non-trainable params
98.0 K Total params
0.392 Total estimated model params size (MB)
487 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:52:04,980] Trial 24 finished with value: 3.475310766663811 and parameters: {'input_chunk_length': 5, 'output_chunk_length': 3, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 16, 'num_attention_heads': 4, 'dropout': 0.08098984851629673, 'lr': 0.008342599607535939}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 25.3 K | train
11 | lstm_decoder | LSTM | 25.3 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.4 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
114 K Trainable params
0 Non-trainable params
114 K Total params
0.458 Total estimated model params size (MB)
489 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:52:27,072] Trial 25 finished with value: 3.7176705928077998 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 3, 'batch_size': 16, 'num_attention_heads': 5, 'dropout': 0.12077988741928815, 'lr': 0.003458069823209385}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 8.4 K | train
11 | lstm_decoder | LSTM | 8.4 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.6 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
81.1 K Trainable params
0 Non-trainable params
81.1 K Total params
0.324 Total estimated model params size (MB)
487 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:52:39,735] Trial 26 finished with value: 3.39792343434966 and parameters: {'input_chunk_length': 3, 'output_chunk_length': 2, 'hidden_size': 32, 'lstm_layers': 1, 'batch_size': 32, 'num_attention_heads': 4, 'dropout': 0.16742602368993495, 'lr': 0.005318645464436097}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.4 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
97.7 K Trainable params
0 Non-trainable params
97.7 K Total params
0.391 Total estimated model params size (MB)
489 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:53:33,828] Trial 27 finished with value: 3.461220183907488 and parameters: {'input_chunk_length': 5, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 2, 'batch_size': 16, 'num_attention_heads': 5, 'dropout': 0.03491331420911459, 'lr': 0.002443478079658969}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 1.6 K | train
4 | encoder_vsn | _VariableSelectionNetwork | 42.8 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 1.6 K | train
6 | static_context_grn | _GatedResidualNetwork | 16.8 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 16.8 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 16.8 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 16.8 K | train
10 | lstm_encoder | LSTM | 66.6 K | train
11 | lstm_decoder | LSTM | 66.6 K | train
12 | post_lstm_gan | _GateAddNorm | 8.4 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 20.9 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 9.1 K | train
15 | post_attn_gan | _GateAddNorm | 8.4 K | train
16 | feed_forward_block | _GatedResidualNetwork | 16.8 K | train
17 | pre_output_gan | _GateAddNorm | 8.4 K | train
18 | output_layer | Linear | 1.1 K | train
------------------------------------------------------------------------------------------------
319 K Trainable params
0 Non-trainable params
319 K Total params
1.278 Total estimated model params size (MB)
491 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 10:53:41,004] Trial 28 finished with value: 3.316752945632209 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 2, 'hidden_size': 64, 'lstm_layers': 2, 'batch_size': 64, 'num_attention_heads': 6, 'dropout': 0.11558464077031792, 'lr': 0.003896577403050395}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 8.4 K | train
11 | lstm_decoder | LSTM | 8.4 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.4 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
80.8 K Trainable params
0 Non-trainable params
80.8 K Total params
0.323 Total estimated model params size (MB)
489 Modules in train mode
0 Modules in eval mode
IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.
Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 11:01:48,566] Trial 48 finished with value: 3.7201316295290225 and parameters: {'input_chunk_length': 4, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 3, 'batch_size': 16, 'num_attention_heads': 6, 'dropout': 0.2636137033231703, 'lr': 0.006353337457239728}. Best is trial 5 with value: 2.7710426061900164.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 8.4 K | train
11 | lstm_decoder | LSTM | 8.4 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.6 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 561 | train
------------------------------------------------------------------------------------------------
81.1 K Trainable params
0 Non-trainable params
81.1 K Total params
0.324 Total estimated model params size (MB)
487 Modules in train mode
0 Modules in eval mode
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
[I 2025-05-07 11:02:07,287] Trial 49 finished with value: 3.0680749745586384 and parameters: {'input_chunk_length': 3, 'output_chunk_length': 1, 'hidden_size': 32, 'lstm_layers': 1, 'batch_size': 32, 'num_attention_heads': 4, 'dropout': 0.13003934182821503, 'lr': 0.0013823781126266845}. Best is trial 5 with value: 2.7710426061900164.
Show code cell source
optuna.visualization.plot_slice(study)
Show code cell source
study.best_params
{'input_chunk_length': 4,
'output_chunk_length': 1,
'hidden_size': 32,
'lstm_layers': 2,
'batch_size': 32,
'num_attention_heads': 6,
'dropout': 0.23395098309603066,
'lr': 0.0015802131864103887}
Show code cell source
_tft_params = study.best_params
del _tft_params['lr']
_tft_opt_kwargs = {'lr': study.best_params['lr']}
Larger TFT model#
Show code cell source
def plot_preds(start=None, end=None):
plt.figure(figsize=(14, 7))
_t = train_rs
_v = val_rs
_p = pred_rs
if start is not None and end is not None:
_t = _t[start:end]
_v = _v[start:end]
_p = _p[start:end]
for idx, (train_t, val_t, pred) in enumerate(zip(
_t, _v, _p
)):
# Common color
mun_color = color_list[idx % len(color_list)]
mun_name = train_t.static_covariates_values()[0][0]
# Plot full history
train_t.plot(color=mun_color, label=mun_name)
# Plot validation actuals
val_t.plot(color=mun_color, linestyle='-', alpha=0.7, label=mun_name)
# Plot predictions (aligned properly)
pred.plot(color=mun_color, linestyle='--', marker='o')
# Add split marker
split_time = train_t.end_time()
split_value = train_t.last_value()
plt.scatter(
split_time,
split_value,
color='black',
marker='X',
s=20,
zorder=10,
label='_nolegend_'
)
#plt.axvline(pd.Timestamp('2018-01-01'), color='gray', alpha=0.5)
plt.axvline(2018, color='gray', alpha=0.5)
plt.title("Train/Validation Split with Forecasts", pad=20)
plt.grid(True, alpha=0.2)
plt.xticks(list(plt.xticks()[0]) + [2019, 2021, 2023, 2024])
plt.legend(bbox_to_anchor=(1.05, 1))
# plt.tight_layout()
Show code cell source
logger = TensorBoardLogger("lightning_logs", name="tft")
# Initialize model with quantile regression
model_tft = TFTModel(
random_state=RANDOM_STATE,
add_relative_index=True,
use_static_covariates=True,
likelihood=QuantileRegression(quantiles=[0.05, 0.5, 0.95]),
optimizer_kwargs=_tft_opt_kwargs,
pl_trainer_kwargs={"logger": logger},
**_tft_params
)
model_tft.fit(
train_scaled,
past_covariates=train_covs_scaled,
val_series=val_scaled,
val_past_covariates=val_covs_scaled,
epochs=N_EPOCHS_FULL,
verbose=True
)
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.3 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 99 | train
------------------------------------------------------------------------------------------------
97.2 K Trainable params
0 Non-trainable params
97.2 K Total params
0.389 Total estimated model params size (MB)
491 Modules in train mode
0 Modules in eval mode
`Trainer.fit` stopped: `max_epochs=200` reached.
TFTModel(output_chunk_shift=0, hidden_size=32, lstm_layers=2, num_attention_heads=6, full_attention=False, feed_forward=GatedResidualNetwork, dropout=0.23395098309603066, hidden_continuous_size=8, categorical_embedding_sizes=None, add_relative_index=True, loss_fn=None, likelihood=QuantileRegression(quantiles: Optional[list[float]] = None), norm_type=LayerNorm, use_static_covariates=True, random_state=123, optimizer_kwargs={'lr': 0.0015802131864103887}, pl_trainer_kwargs={'logger': <pytorch_lightning.loggers.tensorboard.TensorBoardLogger object at 0x7b2c08372e50>}, input_chunk_length=4, output_chunk_length=1, batch_size=32)
Train and validation loss#
Show code cell source
# Path to TensorBoard log directory
log_dir = "lightning_logs/tft/version_0/"
# Load the event file
event_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir) if f.startswith("events.out")]
event_acc = EventAccumulator(event_files[0])
event_acc.Reload()
# Extract loss curves
train_loss = event_acc.Scalars('train_loss')
val_loss = event_acc.Scalars('val_loss')
# Prepare data for plotting
train_steps = [x.step for x in train_loss]
train_values = [x.value for x in train_loss]
val_steps = [x.step for x in val_loss]
val_values = [x.value for x in val_loss]
# Plot
plt.figure(figsize=(10,6))
plt.plot(train_steps, train_values, label='Train Loss')
plt.plot(val_steps, val_values, label='Validation Loss')
plt.xlabel('Steps (Batches and Epochs)')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Curves')
plt.legend()
plt.show()
Show code cell source
(N_EPOCHS_FULL/ 13000) * 2000
30.76923076923077
Final TFT model#
Show code cell source
model_tft_small = TFTModel(
random_state=RANDOM_STATE,
add_relative_index=True,
use_static_covariates=True,
likelihood=QuantileRegression(quantiles=[0.05, 0.5, 0.95]),
optimizer_kwargs=_tft_opt_kwargs,
**_tft_params
)
model_tft_small.fit(
train_scaled,
past_covariates=train_covs_scaled,
val_series=val_scaled,
val_past_covariates=val_covs_scaled,
epochs=N_EPOCHS_OPT,
verbose=True
)
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
| Name | Type | Params | Mode
------------------------------------------------------------------------------------------------
0 | train_metrics | MetricCollection | 0 | train
1 | val_metrics | MetricCollection | 0 | train
2 | input_embeddings | _MultiEmbedding | 0 | train
3 | static_covariates_vsn | _VariableSelectionNetwork | 896 | train
4 | encoder_vsn | _VariableSelectionNetwork | 25.9 K | train
5 | decoder_vsn | _VariableSelectionNetwork | 896 | train
6 | static_context_grn | _GatedResidualNetwork | 4.3 K | train
7 | static_context_hidden_encoder_grn | _GatedResidualNetwork | 4.3 K | train
8 | static_context_cell_encoder_grn | _GatedResidualNetwork | 4.3 K | train
9 | static_context_enrichment | _GatedResidualNetwork | 4.3 K | train
10 | lstm_encoder | LSTM | 16.9 K | train
11 | lstm_decoder | LSTM | 16.9 K | train
12 | post_lstm_gan | _GateAddNorm | 2.2 K | train
13 | static_enrichment_grn | _GatedResidualNetwork | 5.3 K | train
14 | multihead_attn | _InterpretableMultiHeadAttention | 2.3 K | train
15 | post_attn_gan | _GateAddNorm | 2.2 K | train
16 | feed_forward_block | _GatedResidualNetwork | 4.3 K | train
17 | pre_output_gan | _GateAddNorm | 2.2 K | train
18 | output_layer | Linear | 99 | train
------------------------------------------------------------------------------------------------
97.2 K Trainable params
0 Non-trainable params
97.2 K Total params
0.389 Total estimated model params size (MB)
491 Modules in train mode
0 Modules in eval mode
`Trainer.fit` stopped: `max_epochs=31` reached.
TFTModel(output_chunk_shift=0, hidden_size=32, lstm_layers=2, num_attention_heads=6, full_attention=False, feed_forward=GatedResidualNetwork, dropout=0.23395098309603066, hidden_continuous_size=8, categorical_embedding_sizes=None, add_relative_index=True, loss_fn=None, likelihood=QuantileRegression(quantiles: Optional[list[float]] = None), norm_type=LayerNorm, use_static_covariates=True, random_state=123, optimizer_kwargs={'lr': 0.0015802131864103887}, input_chunk_length=4, output_chunk_length=1, batch_size=32)
Show code cell source
final_prediction = model_tft_small.predict(
n=len(val_scaled[0]),
series=train_scaled,
past_covariates=val_covs_scaled,
num_samples=200
)
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Show code cell source
# Inverse transform
pred_rs = target_scaler.inverse_transform(final_prediction)
train_rs = target_scaler.inverse_transform(train_scaled)
val_rs = target_scaler.inverse_transform(val_scaled)
Plot predictions#
Show code cell source
# get list of the larger municipalities
mun_stats = pd.read_csv("data/statfin_vaerak_pxt_11ra_municipalities.csv", index_col=0)
mun_migri = pd.read_csv("data/statfin_muutl_pxt_11a1_municipalities.csv", index_col=0)
pop_stats = \
(mun_stats
.loc[mun_stats["Information"] == "Population 31 Dec", :]
.melt(
id_vars=["Area", "Information"],
var_name="year",
value_name="Population 31 Dec")
.drop(columns=["Information"])
.pivot(columns="Area", index="year")
# .astype('int')
.describe())
muns_of_interest = list(pop_stats.loc[:, (pop_stats.loc[["min"]] > 10000).values[0]].columns.droplevel())
# muns_of_interest = pop_stats.loc[["min"]].transpose().reset_index().sort_values(by="min")[-10:]["Area"].values
muns_of_interest = set(muns_of_interest)
color_list = [
'#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
'#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
'#bcbd22', '#17becf', '#aec7e8', '#ffbb78'
]
Show code cell source
for i in range(len(muns_of_interest)):
plot_preds(start=i, end=i+1)
# plot_preds()
/tmp/ipykernel_51916/2932993500.py:2: RuntimeWarning:
More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`.
/tmp/ipykernel_51916/2932993500.py:46: UserWarning:
No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
Show code cell source
plt.close()
TFT explainer#
Show code cell source
from darts.explainability import TFTExplainer
import pandas as pd
# Initialize the explainer with background data
explainer = TFTExplainer(
model_tft_small,
background_series=train_scaled,
background_past_covariates=train_covs_scaled,
)
all_results = []
for i, series in enumerate(val_scaled):
# Explain one series at a time
result = explainer.explain(
foreground_series=series,
foreground_past_covariates=val_covs_scaled[i]
)
all_results.append(result)
print(f"Explained series {i+1}/{len(val_scaled)}")
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 1/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 2/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 3/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 4/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 5/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 6/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 7/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 8/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 9/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 10/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 11/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 12/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 13/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 14/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 15/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 16/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 17/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 18/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 19/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 20/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 21/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 22/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 23/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 24/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 25/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 26/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 27/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 28/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 29/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 30/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 31/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 32/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 33/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 34/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 35/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 36/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 37/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 38/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 39/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 40/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 41/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 42/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 43/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 44/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 45/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 46/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 47/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 48/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 49/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 50/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 51/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 52/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 53/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 54/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 55/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 56/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 57/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 58/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 59/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 60/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 61/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 62/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 63/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 64/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 65/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 66/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 67/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 68/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 69/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 70/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 71/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 72/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 73/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 74/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 75/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 76/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 77/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 78/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 79/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 80/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 81/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 82/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 83/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 84/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 85/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 86/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 87/89
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Explained series 88/89
Explained series 89/89
Show code cell source
import matplotlib.pyplot as plt
def plot_multi_panel_encoder_importance(encoder_importances, n_cols=2, fig_width=20, names=None):
"""Plot encoder importance for multiple series in a grid layout."""
n_series = len(encoder_importances)
n_rows = (n_series + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(fig_width, n_rows*5))
axes = axes.flatten() # Convert to 1D array for easy indexing
for idx, (imp_df, ax) in enumerate(zip(encoder_importances, axes)):
imp_df.T.plot.barh(ax=ax)
if not names:
ax.set_title(f'Municipality {idx+1}')
else:
ax.set_title(names[idx])
ax.set_xlabel('Importance (%)')
# Hide empty subplots
for j in range(n_series, len(axes)):
axes[j].axis('off')
plt.tight_layout()
return fig
# Extract encoder importances from each result object
encoder_importances_list = [res.get_encoder_importance() for res in all_results]
fig = plot_multi_panel_encoder_importance(encoder_importances_list, names=mun_names)
plt.show()
Show code cell source
df_list = []
for r in all_results:
_df = pd.DataFrame(r.get_encoder_importance())
_df = _df.reindex(sorted(_df.columns), axis=1)
df_list.append(_df)
df_res = pd.concat(df_list, axis=0)
df_res.index = mun_names
Show code cell source
# Example data slice
data = df_res # .iloc[:, -10:-1]
# Figure size tall
fig = plt.figure(figsize=(6, 20))
# Create a GridSpec with 2 rows:
# - big row for heatmap
# - small row for colorbar
gs = gridspec.GridSpec(2, 1, height_ratios=[20, 1], hspace=0.3)
# Heatmap axis (big tall one)
ax_heatmap = fig.add_subplot(gs[0])
# Plot heatmap without colorbar
sns.heatmap(data, cmap="cividis", cbar=False, ax=ax_heatmap)
ax_heatmap.grid(False)
plt.sca(ax_heatmap)
plt.xticks(rotation=45, ha='right', rotation_mode='anchor')
# Colorbar axis (small horizontal below heatmap)
# We'll create a smaller axis inside the bottom GridSpec row, aligned right
# For that, create an inset axis inside gs[1] with width ~40% of figure width, aligned right
# Get position of gs[1] in figure coordinates
pos = gs[1].get_position(fig)
# Define width and height for colorbar axis (in figure fraction)
cbar_width = 0.4 * (pos.x1 - pos.x0)
cbar_height = 0.2 * (pos.y1 - pos.y0)
# Position colorbar axis at bottom right inside gs[1]
cbar_left = pos.x1 - cbar_width # align right
cbar_bottom = pos.y0 * 1.5
cbar_ax = fig.add_axes([cbar_left, cbar_bottom, cbar_width, cbar_height])
# Create ScalarMappable for colorbar
norm = plt.Normalize(vmin=data.values.min(), vmax=data.values.max())
sm = plt.cm.ScalarMappable(cmap="cividis", norm=norm)
sm.set_array([])
# Draw horizontal colorbar
cbar = fig.colorbar(sm, cax=cbar_ax, orientation='horizontal')
cbar.set_label('Importance (%)')
cbar.ax.tick_params(labelsize=10)
plt.savefig("images/enc-importance.png")
plt.show()
Show code cell source
data = df_res# .iloc[:, -10:-1]
data = np.log1p(data) # for better visualization
# Create clustermap without the default colorbar
g = sns.clustermap(
data,
cmap="cividis",
figsize=(6, 20),
row_cluster=True,
col_cluster=True,
cbar_pos=None, # Disable default colorbar
dendrogram_ratio=(0.25, 0.1), # adjust dendrogram size if needed
linewidths=0.0,
method='ward'
)
g.ax_heatmap.grid(False)
# Get the color limits from the heatmap
vmin, vmax = g.data2d.min().min(), g.data2d.max().max()
# Create a new axis for the colorbar below the heatmap
# Position it relative to the clustermap figure
fig = g.fig
cbar_ax = fig.add_axes([0.5, -0.01, 0.4, 0.01]) # [left, bottom, width, height] in figure coords
# Create ScalarMappable for colorbar
norm = plt.Normalize(vmin=vmin, vmax=vmax)
sm = plt.cm.ScalarMappable(cmap="cividis", norm=norm)
sm.set_array([])
# Draw horizontal colorbar in the new axis
cbar = fig.colorbar(sm, cax=cbar_ax, orientation='horizontal')
cbar.set_label('Importance (log(1 + %))')
cbar.ax.tick_params(labelsize=10)
plt.savefig("images/enc-importance-hclust.png")
plt.show()
Show code cell source
from darts.metrics import mape
hfc_params = {
"series": train_scaled,
"start": 0.2,
"forecast_horizon": 3,
"stride": 1,
"verbose": True,
}
# Run historical forecasts
historical_forecasts = model_tft_small.historical_forecasts(
past_covariates=val_covs_scaled,
last_points_only=True,
retrain=False,
**hfc_params
)
`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` future values of your `past_covariates` (relative to the first predicted time step). To hide this warning, set `show_warnings=False`.
Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Show code cell source
plt.close()
# Plot results
fig, ax = plt.subplots(45, 2, figsize=(10, 40))
axes = ax.flatten()
for idx in range(len(muns_of_interest)-1):
train_scaled[idx].plot(label="data", ax=axes[idx])
historical_forecasts[idx].plot(label="backtest forecasts", ax=axes[idx])
# print(f"MAPE = {mape(train_scaled[idx], historical_forecasts[idx]):.2f}%")